HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-01-19 22:35:04 +00:00
parent 02919e61f6
commit 1aed1296dd
5 changed files with 101 additions and 2 deletions

View File

@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)

View File

@ -337,6 +337,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
private HAContext haContext;
private boolean haEnabled;
private final Configuration conf;
PendingDataNodeMessages getPendingDataNodeMessages() {
@ -545,6 +547,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if (UserGroupInformation.isSecurityEnabled()) {
startSecretManager();
}
if (haEnabled) {
// Renew all of the leases before becoming active.
// This is because, while we were in standby mode,
// the leases weren't getting renewed on this NN.
// Give them all a fresh start here.
leaseManager.renewAllLeases();
}
leaseManager.startMonitor();
} finally {
writeUnlock();
@ -737,8 +746,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// block allocation has to be persisted in HA using a shared edits directory
// so that the standby has up-to-date namespace information
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
HAUtil.usesSharedEditsDir(conf);
this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);

View File

@ -200,6 +200,15 @@ public class LeaseManager {
}
}
/**
* Renew all of the currently open leases.
*/
synchronized void renewAllLeases() {
for (Lease l : leases.values()) {
renewLease(l);
}
}
/************************************************************
* A Lease governs all the locks held by a single client.
* For each client there's a corresponding lease, whose
@ -306,6 +315,11 @@ public class LeaseManager {
paths.remove(oldpath);
paths.add(newpath);
}
@VisibleForTesting
long getLastUpdate() {
return lastUpdate;
}
}
synchronized void changeLease(String src, String dst,

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
import org.apache.hadoop.ipc.Server;
@ -126,6 +127,19 @@ public class NameNodeAdapter {
return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
}
/**
* @return the timestamp of the last renewal of the given lease,
* or -1 in the case that the lease doesn't exist.
*/
public static long getLeaseRenewalTime(NameNode nn, String path) {
LeaseManager lm = nn.getNamesystem().leaseManager;
Lease l = lm.getLeaseByPath(path);
if (l == null) {
return -1;
}
return l.getLastUpdate();
}
/**
* Return the datanode descriptor for the given datanode.
*/

View File

@ -24,15 +24,19 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
import org.apache.tools.ant.taskdefs.WaitFor;
import org.junit.Test;
import org.mockito.Mockito;
@ -45,6 +49,7 @@ public class TestHAStateTransitions {
TestStandbyIsHot.class);
private static final Path TEST_DIR = new Path("/test");
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
private static final String TEST_FILE_DATA =
"Hello state transitioning world";
@ -191,4 +196,59 @@ public class TestHAStateTransitions {
cluster.shutdown();
}
}
/**
* Test for HDFS-2812. Since lease renewals go from the client
* only to the active NN, the SBN will have out-of-date lease
* info when it becomes active. We need to make sure we don't
* accidentally mark the leases as expired when the failover
* proceeds.
*/
@Test(timeout=120000)
public void testLeasesRenewedOnTransition() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build();
FSDataOutputStream stm = null;
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
NameNode nn0 = cluster.getNameNode(0);
NameNode nn1 = cluster.getNameNode(1);
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
nn1.getNamesystem().getEditLogTailer().interrupt();
try {
cluster.waitActive();
cluster.transitionToActive(0);
LOG.info("Starting with NN 0 active");
stm = fs.create(TEST_FILE_PATH);
long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
assertTrue(nn0t0 > 0);
long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertEquals("Lease should not yet exist on nn1",
-1, nn1t0);
Thread.sleep(5); // make sure time advances!
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertTrue("Lease should have been created on standby. Time was: " +
nn1t1, nn1t1 > nn0t0);
Thread.sleep(5); // make sure time advances!
LOG.info("Failing over to NN 1");
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
assertTrue("Lease should have been renewed by failover process",
nn1t2 > nn1t1);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
}