From b4716f96d26ec105ed0bee230ef8dc4733ce9876 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Wed, 13 Mar 2013 19:56:03 +0000 Subject: [PATCH] HDFS-4591. HA clients can fail to fail over while Standby NN is performing long checkpoint. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1456109 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/test/GenericTestUtils.java | 13 ++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../DelegationTokenSecretManager.java | 1 + .../server/blockmanagement/BlockManager.java | 4 +- .../hdfs/server/namenode/FSNamesystem.java | 71 +++++++++++++++++-- .../server/namenode/NameNodeRpcServer.java | 6 -- .../hdfs/server/namenode/Namesystem.java | 4 ++ .../hdfs/server/namenode/ha/HAContext.java | 14 +++- .../namenode/ha/TestStandbyCheckpoints.java | 51 +++++++++++++ .../server/namenode/ha/TestStandbyIsHot.java | 1 + 10 files changed, 151 insertions(+), 17 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index dd0c9556d6e..bfb52a8f928 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -162,6 +162,9 @@ public abstract class GenericTestUtils { private final CountDownLatch waitLatch = new CountDownLatch(1); private final CountDownLatch resultLatch = new CountDownLatch(1); + private final AtomicInteger fireCounter = new AtomicInteger(0); + private final AtomicInteger resultCounter = new AtomicInteger(0); + // Result fields set after proceed() is called. private volatile Throwable thrown; private volatile Object returnValue; @@ -188,6 +191,7 @@ public abstract class GenericTestUtils { @Override public Object answer(InvocationOnMock invocation) throws Throwable { LOG.info("DelayAnswer firing fireLatch"); + fireCounter.getAndIncrement(); fireLatch.countDown(); try { LOG.info("DelayAnswer waiting on waitLatch"); @@ -208,6 +212,7 @@ public abstract class GenericTestUtils { thrown = t; throw t; } finally { + resultCounter.incrementAndGet(); resultLatch.countDown(); } } @@ -235,6 +240,14 @@ public abstract class GenericTestUtils { public Object getReturnValue() { return returnValue; } + + public int getFireCount() { + return fireCounter.get(); + } + + public int getResultCount() { + return resultCounter.get(); + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6af35979bbe..782a070ae22 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -79,6 +79,9 @@ Release 2.0.5-beta - UNRELEASED HDFS-4583. TestNodeCount fails. (Ivan Mitic via suresh) + HDFS-4591. HA clients can fail to fail over while Standby NN is performing + long checkpoint. (atm) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index a25ba5b52fd..d18ac9f456d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -78,6 +78,7 @@ public class DelegationTokenSecretManager @Override //SecretManager public void checkAvailableForRead() throws StandbyException { + namesystem.checkOperation(OperationCategory.READ); namesystem.readLock(); try { namesystem.checkOperation(OperationCategory.READ); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index c3b98692f63..b14f1379776 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.namenode.FSClusterStats; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.Namesystem; +import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.BlockCommand; import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations; @@ -874,9 +875,10 @@ public class BlockManager { */ public BlocksWithLocations getBlocks(DatanodeID datanode, long size ) throws IOException { + namesystem.checkOperation(OperationCategory.READ); namesystem.readLock(); try { - namesystem.checkSuperuserPrivilege(); + namesystem.checkOperation(OperationCategory.READ); return getBlocksWithLocations(datanode, size); } finally { namesystem.readUnlock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9c27035bc1f..0fee1e0c50c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1081,8 +1081,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void metaSave(String filename) throws IOException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.UNCHECKED); writeLock(); try { + checkOperation(OperationCategory.UNCHECKED); File file = new File(System.getProperty("hadoop.log.dir"), filename); PrintWriter out = new PrintWriter(new BufferedWriter( new OutputStreamWriter(new FileOutputStream(file, true), Charsets.UTF_8))); @@ -1155,6 +1157,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, UnresolvedLinkException, IOException { HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1192,6 +1195,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, UnresolvedLinkException, IOException { HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1302,13 +1306,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws FileNotFoundException, UnresolvedLinkException, IOException { for (int attempt = 0; attempt < 2; attempt++) { - if (attempt == 0) { // first attempt is with readlock + boolean isReadOp = (attempt == 0); + if (isReadOp) { // first attempt is with readlock + checkOperation(OperationCategory.READ); readLock(); } else { // second attempt is with write lock + checkOperation(OperationCategory.WRITE); writeLock(); // writelock is needed to set accesstime } try { - checkOperation(OperationCategory.READ); + if (isReadOp) { + checkOperation(OperationCategory.READ); + } else { + checkOperation(OperationCategory.WRITE); + } // if the namenode is in safemode, then do not update access time if (isInSafeMode()) { @@ -1321,7 +1332,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (now <= inode.getAccessTime() + getAccessTimePrecision()) { // if we have to set access time but we only have the readlock, then // restart this entire operation with the writeLock. - if (attempt == 0) { + if (isReadOp) { continue; } } @@ -1331,7 +1342,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, inode.computeFileSize(false), inode.isUnderConstruction(), offset, length, needBlockToken); } finally { - if (attempt == 0) { + if (isReadOp) { readUnlock(); } else { writeUnlock(); @@ -1387,6 +1398,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1529,6 +1541,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1569,6 +1582,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws IOException, UnresolvedLinkException { HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1644,6 +1658,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, blockManager.verifyReplication(src, replication, null); final boolean isFile; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1674,6 +1689,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, long getPreferredBlockSize(String filename) throws IOException, UnresolvedLinkException { FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); @@ -1733,6 +1749,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, FileNotFoundException, ParentNotDirectoryException, IOException { boolean skipSync = false; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -1927,6 +1944,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws IOException { boolean skipSync = false; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2064,6 +2082,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } LocatedBlock lb = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2133,8 +2152,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } // Part I. Analyze the state of the file with respect to the input data. + checkOperation(OperationCategory.READ); readLock(); try { + checkOperation(OperationCategory.READ); LocatedBlock[] onRetryBlock = new LocatedBlock[1]; final INode[] inodes = analyzeFileState( src, clientName, previous, onRetryBlock); @@ -2161,8 +2182,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // Allocate a new block, add it to the INode and the BlocksMap. Block newBlock = null; long offset; + checkOperation(OperationCategory.WRITE); writeLock(); try { + checkOperation(OperationCategory.WRITE); // Run the full analysis again, since things could have changed // while chooseTarget() was executing. LocatedBlock[] onRetryBlock = new LocatedBlock[1]; @@ -2312,9 +2335,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, final DatanodeDescriptor clientnode; final long preferredblocksize; final List chosen; + checkOperation(OperationCategory.READ); readLock(); try { - checkOperation(OperationCategory.WRITE); + checkOperation(OperationCategory.READ); //check safe mode if (isInSafeMode()) { throw new SafeModeException("Cannot add datanode; src=" + src @@ -2354,6 +2378,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, boolean abandonBlock(ExtendedBlock b, String src, String holder) throws LeaseExpiredException, FileNotFoundException, UnresolvedLinkException, IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2427,6 +2452,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws SafeModeException, UnresolvedLinkException, IOException { checkBlock(last); boolean success = false; + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2594,6 +2620,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, " to " + dst); } FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2650,6 +2677,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, + src + " to " + dst); } FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2736,6 +2764,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, IOException { ArrayList collectedBlocks = new ArrayList(); FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2854,6 +2883,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, StandbyException, IOException { HdfsFileStatus stat = null; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); @@ -2896,6 +2926,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src); } FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2956,6 +2987,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, FileNotFoundException, UnresolvedLinkException, StandbyException { FSPermissionChecker pc = new FSPermissionChecker(fsOwnerShortUserName, supergroup); + checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); @@ -2976,6 +3008,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void setQuota(String path, long nsQuota, long dsQuota) throws IOException, UnresolvedLinkException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -2999,6 +3032,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void fsync(String src, String clientName, long lastBlockLength) throws IOException, UnresolvedLinkException { NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -3203,6 +3237,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, String[] newtargetstorages) throws IOException, UnresolvedLinkException { String src = ""; + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -3306,6 +3341,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * Renew the lease(s) held by the given client */ void renewLease(String holder) throws IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -3347,6 +3383,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws AccessControlException, UnresolvedLinkException, IOException { DirectoryListing dl; FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); @@ -3633,10 +3670,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } DatanodeInfo[] datanodeReport(final DatanodeReportType type - ) throws AccessControlException { + ) throws AccessControlException, StandbyException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.UNCHECKED); readLock(); try { + checkOperation(OperationCategory.UNCHECKED); final DatanodeManager dm = getBlockManager().getDatanodeManager(); final List results = dm.getDatanodeListForReport(type); @@ -3660,8 +3699,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void saveNamespace() throws AccessControlException, IOException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.UNCHECKED); readLock(); try { + checkOperation(OperationCategory.UNCHECKED); if (!isInSafeMode()) { throw new IOException("Safe mode should be turned ON " + "in order to create namespace image."); @@ -3679,10 +3720,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * * @throws AccessControlException if superuser privilege is violated. */ - boolean restoreFailedStorage(String arg) throws AccessControlException { + boolean restoreFailedStorage(String arg) throws AccessControlException, + StandbyException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.UNCHECKED); writeLock(); try { + checkOperation(OperationCategory.UNCHECKED); // if it is disabled - enable it and vice versa. if(arg.equals("check")) @@ -3703,6 +3747,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void finalizeUpgrade() throws IOException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -4442,6 +4487,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, CheckpointSignature rollEditLog() throws IOException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.JOURNAL); writeLock(); try { checkOperation(OperationCategory.JOURNAL); @@ -4459,6 +4505,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, NamenodeRegistration bnReg, // backup node NamenodeRegistration nnReg) // active name-node throws IOException { + checkOperation(OperationCategory.CHECKPOINT); writeLock(); try { checkOperation(OperationCategory.CHECKPOINT); @@ -4477,6 +4524,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void endCheckpoint(NamenodeRegistration registration, CheckpointSignature sig) throws IOException { + checkOperation(OperationCategory.CHECKPOINT); readLock(); try { checkOperation(OperationCategory.CHECKPOINT); @@ -4765,6 +4813,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * Client is reporting some bad block locations. */ void reportBadBlocks(LocatedBlock[] blocks) throws IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -4799,6 +4848,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, LocatedBlock updateBlockForPipeline(ExtendedBlock block, String clientName) throws IOException { LocatedBlock locatedBlock; + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -4830,6 +4880,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void updatePipeline(String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, DatanodeID[] newNodes) throws IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -4957,8 +5008,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void releaseBackupNode(NamenodeRegistration registration) throws IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { + checkOperation(OperationCategory.WRITE); if(getFSImage().getStorage().getNamespaceID() != registration.getNamespaceID()) throw new IOException("Incompatible namespaceIDs: " @@ -4997,6 +5050,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, Collection listCorruptFileBlocks(String path, String[] cookieTab) throws IOException { checkSuperuserPrivilege(); + checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); @@ -5089,6 +5143,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, Token getDelegationToken(Text renewer) throws IOException { Token token; + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -5135,6 +5190,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, long renewDelegationToken(Token token) throws InvalidToken, IOException { long expiryTime; + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -5167,6 +5223,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void cancelDelegationToken(Token token) throws IOException { + checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 2bf69c071b6..b806ab8040e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -325,7 +325,6 @@ class NameNodeRpcServer implements NamenodeProtocols { throw new IllegalArgumentException( "Unexpected not positive size: "+size); } - namesystem.checkOperation(OperationCategory.READ); namesystem.checkSuperuserPrivilege(); return namesystem.getBlockManager().getBlocks(datanode, size); } @@ -699,7 +698,6 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // ClientProtocol public DatanodeInfo[] getDatanodeReport(DatanodeReportType type) throws IOException { - namesystem.checkOperation(OperationCategory.UNCHECKED); DatanodeInfo results[] = namesystem.datanodeReport(type); if (results == null ) { throw new IOException("Cannot find datanode report"); @@ -724,19 +722,16 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // ClientProtocol public boolean restoreFailedStorage(String arg) throws IOException { - namesystem.checkOperation(OperationCategory.UNCHECKED); return namesystem.restoreFailedStorage(arg); } @Override // ClientProtocol public void saveNamespace() throws IOException { - namesystem.checkOperation(OperationCategory.UNCHECKED); namesystem.saveNamespace(); } @Override // ClientProtocol public long rollEdits() throws AccessControlException, IOException { - namesystem.checkOperation(OperationCategory.JOURNAL); CheckpointSignature sig = namesystem.rollEditLog(); return sig.getCurSegmentTxId(); } @@ -781,7 +776,6 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override // ClientProtocol public void metaSave(String filename) throws IOException { - namesystem.checkOperation(OperationCategory.UNCHECKED); namesystem.metaSave(filename); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java index c453db561eb..c82e9155ed6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hdfs.server.namenode; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; import org.apache.hadoop.hdfs.util.RwLock; +import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.security.AccessControlException; /** Namesystem operations. */ @@ -38,4 +40,6 @@ public interface Namesystem extends RwLock, SafeMode { public boolean isGenStampInFuture(long generationStamp); public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal); + + public void checkOperation(OperationCategory read) throws StandbyException; } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java index b052e4ea9e3..823738798d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java @@ -64,9 +64,17 @@ public interface HAContext { void writeUnlock(); /** - * Verify that the given operation category is allowed in the - * current state. This is to allow NN implementations (eg BackupNode) - * to override it with node-specific handling. + * Verify that the given operation category is allowed in the current state. + * This is to allow NN implementations (eg BackupNode) to override it with + * node-specific handling. + * + * If the operation which is being checked will be taking the FSNS lock, it's + * advisable to check the operation category both immediately before and after + * taking the lock. This is because clients rely on the StandbyException + * thrown by this method in order to trigger client failover, and if a client + * first tries to contact the Standby NN, it could block for a long time if + * the Standby is holding the lock for a while, e.g. when performing a + * checkpoint. See HDFS-4591 for more details. */ void checkOperation(OperationCategory op) throws StandbyException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index c449acae564..04750685e0d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; @@ -26,6 +27,8 @@ import java.io.OutputStream; import java.net.URI; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -43,7 +46,10 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; +import org.apache.hadoop.util.ThreadUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -59,6 +65,8 @@ public class TestStandbyCheckpoints { protected MiniDFSCluster cluster; protected NameNode nn0, nn1; protected FileSystem fs; + + private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class); @SuppressWarnings("rawtypes") @Before @@ -230,6 +238,49 @@ public class TestStandbyCheckpoints { assertTrue(canceledOne); } + + /** + * Make sure that clients will receive StandbyExceptions even when a + * checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer + * thread will have FSNS lock. Regression test for HDFS-4591. + */ + @Test(timeout=120000) + public void testStandbyExceptionThrownDuringCheckpoint() throws Exception { + + // Set it up so that we know when the SBN checkpoint starts and ends. + FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1); + DelayAnswer answerer = new DelayAnswer(LOG); + Mockito.doAnswer(answerer).when(spyImage1) + .saveNamespace(Mockito.any(FSNamesystem.class), + Mockito.any(Canceler.class)); + + // Perform some edits and wait for a checkpoint to start on the SBN. + doEdits(0, 2000); + nn0.getRpcServer().rollEditLog(); + answerer.waitForCall(); + answerer.proceed(); + assertTrue("SBN is not performing checkpoint but it should be.", + answerer.getFireCount() == 1 && answerer.getResultCount() == 0); + + // Make sure that the lock has actually been taken by the checkpointing + // thread. + ThreadUtil.sleepAtLeastIgnoreInterrupts(1000); + try { + // Perform an RPC to the SBN and make sure it throws a StandbyException. + nn1.getRpcServer().getFileInfo("/"); + fail("Should have thrown StandbyException, but instead succeeded."); + } catch (StandbyException se) { + GenericTestUtils.assertExceptionContains("is not supported", se); + } + + // Make sure that the checkpoint is still going on, implying that the client + // RPC to the SBN happened during the checkpoint. + assertTrue("SBN should have still been checkpointing.", + answerer.getFireCount() == 1 && answerer.getResultCount() == 0); + answerer.waitForResult(); + assertTrue("SBN should have finished checkpointing.", + answerer.getFireCount() == 1 && answerer.getResultCount() == 1); + } private void doEdits(int start, int stop) throws IOException { for (int i = start; i < stop; i++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java index 041d754e154..ecd52437b0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java @@ -143,6 +143,7 @@ public class TestStandbyIsHot { conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); // We read from the standby to watch block locations HAUtil.setAllowStandbyReads(conf, true); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology())