HDFS-3835. Long-lived 2NN cannot perform a checkpoint if security is enabled and the NN restarts with outstanding delegation tokens. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1376189 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-08-22 18:48:32 +00:00
parent 8c28ebabcd
commit 4e75deb4e7
6 changed files with 61 additions and 2 deletions

View File

@ -113,6 +113,16 @@ extends AbstractDelegationTokenIdentifier>
} }
} }
/**
* Reset all data structures and mutable state.
*/
public synchronized void reset() {
currentId = 0;
allKeys.clear();
delegationTokenSequenceNumber = 0;
currentTokens.clear();
}
/** /**
* Add a previously used master key to cache (when NN restarts), * Add a previously used master key to cache (when NN restarts),
* should be called before activate(). * should be called before activate().
@ -190,7 +200,6 @@ extends AbstractDelegationTokenIdentifier>
@Override @Override
protected synchronized byte[] createPassword(TokenIdent identifier) { protected synchronized byte[] createPassword(TokenIdent identifier) {
LOG.info("Creating password for identifier: "+identifier);
int sequenceNum; int sequenceNum;
long now = Time.now(); long now = Time.now();
sequenceNum = ++delegationTokenSequenceNumber; sequenceNum = ++delegationTokenSequenceNumber;
@ -198,6 +207,7 @@ extends AbstractDelegationTokenIdentifier>
identifier.setMaxDate(now + tokenMaxLifetime); identifier.setMaxDate(now + tokenMaxLifetime);
identifier.setMasterKeyId(currentId); identifier.setMasterKeyId(currentId);
identifier.setSequenceNumber(sequenceNum); identifier.setSequenceNumber(sequenceNum);
LOG.info("Creating password for identifier: " + identifier);
byte[] password = createPassword(identifier.getBytes(), currentKey.getKey()); byte[] password = createPassword(identifier.getBytes(), currentKey.getKey());
currentTokens.put(identifier, new DelegationTokenInformation(now currentTokens.put(identifier, new DelegationTokenInformation(now
+ tokenRenewInterval, password)); + tokenRenewInterval, password));

View File

@ -491,6 +491,9 @@ Release 2.0.1-alpha - UNRELEASED
HDFS-3837. Fix DataNode.recoverBlock findbugs warning. (eli) HDFS-3837. Fix DataNode.recoverBlock findbugs warning. (eli)
HDFS-3835. Long-lived 2NN cannot perform a checkpoint if security is
enabled and the NN restarts with outstanding delegation tokens. (atm)
BREAKDOWN OF HDFS-3042 SUBTASKS BREAKDOWN OF HDFS-3042 SUBTASKS
HDFS-2185. HDFS portion of ZK-based FailoverController (todd) HDFS-2185. HDFS portion of ZK-based FailoverController (todd)

View File

@ -536,6 +536,7 @@ public class FSImage implements Closeable {
*/ */
void reloadFromImageFile(File file, FSNamesystem target) throws IOException { void reloadFromImageFile(File file, FSNamesystem target) throws IOException {
target.dir.reset(); target.dir.reset();
target.dtSecretManager.reset();
LOG.debug("Reloading namespace from " + file); LOG.debug("Reloading namespace from " + file);
loadFSImage(file, target, null); loadFSImage(file, target, null);

View File

@ -283,7 +283,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// Scan interval is not configurable. // Scan interval is not configurable.
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
private final DelegationTokenSecretManager dtSecretManager; final DelegationTokenSecretManager dtSecretManager;
private final boolean alwaysUseDelegationTokensForTests; private final boolean alwaysUseDelegationTokensForTests;

View File

@ -376,6 +376,7 @@ public class SecondaryNameNode implements Runnable {
downloadImage = false; downloadImage = false;
LOG.info("Image has not changed. Will not download image."); LOG.info("Image has not changed. Will not download image.");
} else { } else {
LOG.info("Image has changed. Downloading updated image from NN.");
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage( MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
nnHostPort, sig.mostRecentCheckpointTxId, dstImage.getStorage(), true); nnHostPort, sig.mostRecentCheckpointTxId, dstImage.getStorage(), true);
dstImage.saveDigestAndRenameCheckpointImage( dstImage.saveDigestAndRenameCheckpointImage(

View File

@ -67,6 +67,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
@ -1835,6 +1836,49 @@ public class TestCheckpoint {
} }
} }
/**
* Regression test for HDFS-3835 - "Long-lived 2NN cannot perform a
* checkpoint if security is enabled and the NN restarts without outstanding
* delegation tokens"
*/
@Test
public void testSecondaryNameNodeWithDelegationTokens() throws IOException {
MiniDFSCluster cluster = null;
SecondaryNameNode secondary = null;
Configuration conf = new HdfsConfiguration();
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
.format(true).build();
assertNotNull(cluster.getNamesystem().getDelegationToken(new Text("atm")));
secondary = startSecondaryNameNode(conf);
// Checkpoint once, so the 2NN loads the DT into its in-memory sate.
secondary.doCheckpoint();
// Perform a saveNamespace, so that the NN has a new fsimage, and the 2NN
// therefore needs to download a new fsimage the next time it performs a
// checkpoint.
cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
cluster.getNameNodeRpc().saveNamespace();
cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
// Ensure that the 2NN can still perform a checkpoint.
secondary.doCheckpoint();
} finally {
if (secondary != null) {
secondary.shutdown();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
@Test @Test
public void testCommandLineParsing() throws ParseException { public void testCommandLineParsing() throws ParseException {
SecondaryNameNode.CommandLineOpts opts = SecondaryNameNode.CommandLineOpts opts =