HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-01-19 22:35:04 +00:00
parent 02919e61f6
commit 1aed1296dd
5 changed files with 101 additions and 2 deletions

View File

@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd) HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm) HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)

View File

@ -337,6 +337,8 @@ private static final void logAuditEvent(UserGroupInformation ugi,
*/ */
private HAContext haContext; private HAContext haContext;
private boolean haEnabled;
private final Configuration conf; private final Configuration conf;
PendingDataNodeMessages getPendingDataNodeMessages() { PendingDataNodeMessages getPendingDataNodeMessages() {
@ -545,6 +547,13 @@ void startActiveServices() throws IOException {
if (UserGroupInformation.isSecurityEnabled()) { if (UserGroupInformation.isSecurityEnabled()) {
startSecretManager(); startSecretManager();
} }
if (haEnabled) {
// Renew all of the leases before becoming active.
// This is because, while we were in standby mode,
// the leases weren't getting renewed on this NN.
// Give them all a fresh start here.
leaseManager.renewAllLeases();
}
leaseManager.startMonitor(); leaseManager.startMonitor();
} finally { } finally {
writeUnlock(); writeUnlock();
@ -737,8 +746,8 @@ private void setConfigurationParameters(Configuration conf)
// block allocation has to be persisted in HA using a shared edits directory // block allocation has to be persisted in HA using a shared edits directory
// so that the standby has up-to-date namespace information // so that the standby has up-to-date namespace information
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) && this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
HAUtil.usesSharedEditsDir(conf); this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY, short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT); DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);

View File

@ -200,6 +200,15 @@ synchronized void renewLease(Lease lease) {
} }
} }
/**
* Renew all of the currently open leases.
*/
synchronized void renewAllLeases() {
for (Lease l : leases.values()) {
renewLease(l);
}
}
/************************************************************ /************************************************************
* A Lease governs all the locks held by a single client. * A Lease governs all the locks held by a single client.
* For each client there's a corresponding lease, whose * For each client there's a corresponding lease, whose
@ -306,6 +315,11 @@ void replacePath(String oldpath, String newpath) {
paths.remove(oldpath); paths.remove(oldpath);
paths.add(newpath); paths.add(newpath);
} }
@VisibleForTesting
long getLastUpdate() {
return lastUpdate;
}
} }
synchronized void changeLease(String src, String dst, synchronized void changeLease(String src, String dst,

View File

@ -28,6 +28,7 @@
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.Server;
@ -126,6 +127,19 @@ public static String getLeaseHolderForPath(NameNode namenode, String path) {
return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder(); return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
} }
/**
* @return the timestamp of the last renewal of the given lease,
* or -1 in the case that the lease doesn't exist.
*/
public static long getLeaseRenewalTime(NameNode nn, String path) {
LeaseManager lm = nn.getNamesystem().leaseManager;
Lease l = lm.getLeaseByPath(path);
if (l == null) {
return -1;
}
return l.getLastUpdate();
}
/** /**
* Return the datanode descriptor for the given datanode. * Return the datanode descriptor for the given datanode.
*/ */

View File

@ -24,15 +24,19 @@
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
import org.apache.tools.ant.taskdefs.WaitFor;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
@ -45,6 +49,7 @@ public class TestHAStateTransitions {
TestStandbyIsHot.class); TestStandbyIsHot.class);
private static final Path TEST_DIR = new Path("/test"); private static final Path TEST_DIR = new Path("/test");
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo"); private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
private static final String TEST_FILE_DATA = private static final String TEST_FILE_DATA =
"Hello state transitioning world"; "Hello state transitioning world";
@ -191,4 +196,59 @@ public void doAnAction() throws Exception {
cluster.shutdown(); cluster.shutdown();
} }
} }
/**
* Test for HDFS-2812. Since lease renewals go from the client
* only to the active NN, the SBN will have out-of-date lease
* info when it becomes active. We need to make sure we don't
* accidentally mark the leases as expired when the failover
* proceeds.
*/
@Test(timeout=120000)
public void testLeasesRenewedOnTransition() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build();
FSDataOutputStream stm = null;
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
NameNode nn0 = cluster.getNameNode(0);
NameNode nn1 = cluster.getNameNode(1);
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
nn1.getNamesystem().getEditLogTailer().interrupt();
try {
cluster.waitActive();
cluster.transitionToActive(0);
LOG.info("Starting with NN 0 active");
stm = fs.create(TEST_FILE_PATH);
long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
assertTrue(nn0t0 > 0);
long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertEquals("Lease should not yet exist on nn1",
-1, nn1t0);
Thread.sleep(5); // make sure time advances!
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertTrue("Lease should have been created on standby. Time was: " +
nn1t1, nn1t1 > nn0t0);
Thread.sleep(5); // make sure time advances!
LOG.info("Failing over to NN 1");
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertTrue("Lease should have been renewed by failover process",
nn1t2 > nn1t1);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
} }