HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.
This commit is contained in:
parent
07558629db
commit
d4adf921a3
|
@ -91,6 +91,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
|
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
|
||||||
|
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
|
||||||
|
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
|
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
|
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
|
||||||
|
@ -1947,7 +1949,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
||||||
SafeModeException se = newSafemodeException(
|
SafeModeException se = newSafemodeException(
|
||||||
"Zero blocklocations for " + srcArg);
|
"Zero blocklocations for " + srcArg);
|
||||||
if (haEnabled && haContext != null &&
|
if (haEnabled && haContext != null &&
|
||||||
haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
|
(haContext.getState().getServiceState() == ACTIVE ||
|
||||||
|
haContext.getState().getServiceState() == OBSERVER)) {
|
||||||
throw new RetriableException(se);
|
throw new RetriableException(se);
|
||||||
} else {
|
} else {
|
||||||
throw se;
|
throw se;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||||
import static org.mockito.Mockito.spy;
|
import static org.mockito.Mockito.spy;
|
||||||
|
|
||||||
|
@ -223,6 +224,12 @@ public class NameNodeAdapter {
|
||||||
return fsnSpy;
|
return fsnSpy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static BlockManager spyOnBlockManager(NameNode nn) {
|
||||||
|
BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
|
||||||
|
nn.getNamesystem().setBlockManagerForTesting(bmSpy);
|
||||||
|
return bmSpy;
|
||||||
|
}
|
||||||
|
|
||||||
public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
|
public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
|
||||||
ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
|
ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
|
||||||
fsn.setFsLockForTests(spy);
|
fsn.setFsLockForTests(spy);
|
||||||
|
|
|
@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
import org.apache.hadoop.io.retry.FailoverProxyProvider;
|
import org.apache.hadoop.io.retry.FailoverProxyProvider;
|
||||||
import org.apache.hadoop.io.retry.RetryInvocationHandler;
|
import org.apache.hadoop.io.retry.RetryInvocationHandler;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
@ -38,9 +45,12 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Proxy;
|
import java.lang.reflect.Proxy;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
|
||||||
|
@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
import static org.mockito.Matchers.any;
|
||||||
|
import static org.mockito.Matchers.anyBoolean;
|
||||||
|
import static org.mockito.Matchers.anyLong;
|
||||||
|
import static org.mockito.Matchers.anyShort;
|
||||||
|
import static org.mockito.Matchers.anyString;
|
||||||
|
import static org.mockito.Mockito.doAnswer;
|
||||||
|
import static org.mockito.Mockito.doNothing;
|
||||||
|
|
||||||
// Main unit tests for ObserverNode
|
// Main unit tests for ObserverNode
|
||||||
public class TestObserverNode {
|
public class TestObserverNode {
|
||||||
|
@ -299,6 +316,56 @@ public class TestObserverNode {
|
||||||
assertEquals(0, rc);
|
assertEquals(0, rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the case where Observer should throw RetriableException, just like
|
||||||
|
* active NN, for certain open() calls where block locations are not
|
||||||
|
* available. See HDFS-13898 for details.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
|
||||||
|
setUpCluster(1);
|
||||||
|
setObserverRead(true);
|
||||||
|
|
||||||
|
// Avoid starting DNs for the mini cluster.
|
||||||
|
BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
|
||||||
|
doNothing().when(bmSpy)
|
||||||
|
.verifyReplication(anyString(), anyShort(), anyString());
|
||||||
|
|
||||||
|
// Create a new file - the request should go to active.
|
||||||
|
dfs.createNewFile(testPath);
|
||||||
|
assertSentTo(0);
|
||||||
|
|
||||||
|
rollEditLogAndTail(0);
|
||||||
|
dfs.open(testPath);
|
||||||
|
assertSentTo(2);
|
||||||
|
|
||||||
|
// Set observer to safe mode.
|
||||||
|
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
|
|
||||||
|
// Mock block manager for observer to generate some fake blocks which
|
||||||
|
// will trigger the (retriable) safe mode exception.
|
||||||
|
final DatanodeInfo[] empty = {};
|
||||||
|
bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
|
||||||
|
doAnswer((invocation) -> {
|
||||||
|
ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
|
||||||
|
LocatedBlock fakeBlock = new LocatedBlock(b, empty);
|
||||||
|
List<LocatedBlock> fakeBlocks = new ArrayList<>();
|
||||||
|
fakeBlocks.add(fakeBlock);
|
||||||
|
return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
|
||||||
|
}).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
|
||||||
|
anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
|
||||||
|
|
||||||
|
// Open the file again - it should throw retriable exception and then
|
||||||
|
// failover to active.
|
||||||
|
dfs.open(testPath);
|
||||||
|
assertSentTo(0);
|
||||||
|
|
||||||
|
// Remove safe mode on observer, request should still go to it.
|
||||||
|
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
dfs.open(testPath);
|
||||||
|
assertSentTo(2);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO this does not currently work because fetching the service state from
|
// TODO this does not currently work because fetching the service state from
|
||||||
// e.g. the StandbyNameNode also waits for the transaction ID to catch up.
|
// e.g. the StandbyNameNode also waits for the transaction ID to catch up.
|
||||||
// This is disabled pending HDFS-13872 and HDFS-13749.
|
// This is disabled pending HDFS-13872 and HDFS-13749.
|
||||||
|
|
Loading…
Reference in New Issue