From 9833468302bd2fa235d9d1f40517631f9dfff517 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Fri, 20 Jul 2012 18:58:58 +0000 Subject: [PATCH] HDFS-3597. SNN fails to start after DFS upgrade. Contributed by Andy Isaacson. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1363899 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../server/namenode/CheckpointSignature.java | 17 ++- .../server/namenode/SecondaryNameNode.java | 16 ++- .../TestSecondaryNameNodeUpgrade.java | 116 ++++++++++++++++++ 4 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryNameNodeUpgrade.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7d108d9de1a..cc16ac35638 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -518,6 +518,8 @@ Branch-2 ( Unreleased changes ) HDFS-3690. BlockPlacementPolicyDefault incorrectly casts LOG. (eli) + HDFS-3597. SNN fails to start after DFS upgrade. (Andy Isaacson via todd) + BREAKDOWN OF HDFS-3042 SUBTASKS HDFS-2185. HDFS portion of ZK-based FailoverController (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java index d723ede9bf1..8c4d79cc9bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java @@ -113,12 +113,19 @@ public String toString() { + blockpoolID ; } + boolean storageVersionMatches(StorageInfo si) throws IOException { + return (layoutVersion == si.layoutVersion) && (cTime == si.cTime); + } + + boolean isSameCluster(FSImage si) { + return namespaceID == si.getStorage().namespaceID && + clusterID.equals(si.getClusterID()) && + blockpoolID.equals(si.getBlockPoolID()); + } + void validateStorageInfo(FSImage si) throws IOException { - if(layoutVersion != si.getStorage().layoutVersion - || namespaceID != si.getStorage().namespaceID - || cTime != si.getStorage().cTime - || !clusterID.equals(si.getClusterID()) - || !blockpoolID.equals(si.getBlockPoolID())) { + if (!isSameCluster(si) + || !storageVersionMatches(si.getStorage())) { throw new IOException("Inconsistent checkpoint fields.\n" + "LV = " + layoutVersion + " namespaceID = " + namespaceID + " cTime = " + cTime diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java index c32bd3383b7..8057955dfac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java @@ -437,18 +437,16 @@ boolean doCheckpoint() throws IOException { // Returns a token that would be used to upload the merged image. CheckpointSignature sig = namenode.rollEditLog(); - // Make sure we're talking to the same NN! - if (checkpointImage.getNamespaceID() != 0) { - // If the image actually has some data, make sure we're talking - // to the same NN as we did before. - sig.validateStorageInfo(checkpointImage); - } else { - // if we're a fresh 2NN, just take the storage info from the server - // we first talk to. + if ((checkpointImage.getNamespaceID() == 0) || + (sig.isSameCluster(checkpointImage) && + !sig.storageVersionMatches(checkpointImage.getStorage()))) { + // if we're a fresh 2NN, or if we're on the same cluster and our storage + // needs an upgrade, just take the storage info from the server. dstStorage.setStorageInfo(sig); dstStorage.setClusterID(sig.getClusterID()); dstStorage.setBlockPoolID(sig.getBlockpoolID()); } + sig.validateStorageInfo(checkpointImage); // error simulation code for junit test CheckpointFaultInjector.getInstance().afterSecondaryCallsRollEditLog(); @@ -703,7 +701,7 @@ static class CheckpointStorage extends FSImage { /** * Analyze checkpoint directories. * Create directories if they do not exist. - * Recover from an unsuccessful checkpoint is necessary. + * Recover from an unsuccessful checkpoint if necessary. * * @throws IOException */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryNameNodeUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryNameNodeUpgrade.java new file mode 100644 index 00000000000..6119584c0d8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryNameNodeUpgrade.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.junit.Test; +import org.junit.Before; +import org.junit.After; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileUtil; + +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; + +import java.util.Properties; +import java.io.FileReader; +import java.io.FileWriter; +import org.junit.Assert; +import org.apache.hadoop.test.GenericTestUtils; + +/** + * Regression test for HDFS-3597, SecondaryNameNode upgrade -- when a 2NN + * starts up with an existing directory structure with an old VERSION file, it + * should delete the snapshot and download a new one from the NN. + */ +public class TestSecondaryNameNodeUpgrade { + + @Before + public void cleanupCluster() throws IOException { + File hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile(); + System.out.println("cleanupCluster deleting " + hdfsDir); + if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir)) { + throw new IOException("Could not delete hdfs directory '" + hdfsDir + "'"); + } + } + + private void doIt(String param, String val) throws IOException { + MiniDFSCluster cluster = null; + FileSystem fs = null; + SecondaryNameNode snn = null; + + try { + Configuration conf = new HdfsConfiguration(); + + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + + conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0"); + snn = new SecondaryNameNode(conf); + + fs = cluster.getFileSystem(); + + fs.mkdirs(new Path("/test/foo")); + + snn.doCheckpoint(); + + List versionFiles = snn.getFSImage().getStorage().getFiles(null, "VERSION"); + + snn.shutdown(); + + for (File versionFile : versionFiles) { + System.out.println("Changing '" + param + "' to '" + val + "' in " + versionFile); + FSImageTestUtil.corruptVersionFile(versionFile, param, val); + } + + snn = new SecondaryNameNode(conf); + + fs.mkdirs(new Path("/test/bar")); + + snn.doCheckpoint(); + } finally { + if (fs != null) fs.close(); + if (cluster != null) cluster.shutdown(); + if (snn != null) snn.shutdown(); + } + } + + @Test + public void testUpgradeLayoutVersionSucceeds() throws IOException { + doIt("layoutVersion", "-39"); + } + + @Test + public void testChangeNsIDFails() throws IOException { + try { + doIt("namespaceID", "2"); + Assert.fail("Should throw InconsistentFSStateException"); + } catch(IOException e) { + GenericTestUtils.assertExceptionContains("Inconsistent checkpoint fields", e); + System.out.println("Correctly failed with inconsistent namespaceID: " + e); + } + } +}