HDFS-3597. SNN fails to start after DFS upgrade. Contributed by Andy Isaacson.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1363900 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86273d0cf4
commit
75a419f15b
|
@ -362,6 +362,8 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
|
|
||||||
HDFS-3690. BlockPlacementPolicyDefault incorrectly casts LOG. (eli)
|
HDFS-3690. BlockPlacementPolicyDefault incorrectly casts LOG. (eli)
|
||||||
|
|
||||||
|
HDFS-3597. SNN fails to start after DFS upgrade. (Andy Isaacson via todd)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||||
|
|
||||||
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
|
||||||
|
|
|
@ -113,12 +113,19 @@ public class CheckpointSignature extends StorageInfo
|
||||||
+ blockpoolID ;
|
+ blockpoolID ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean storageVersionMatches(StorageInfo si) throws IOException {
|
||||||
|
return (layoutVersion == si.layoutVersion) && (cTime == si.cTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isSameCluster(FSImage si) {
|
||||||
|
return namespaceID == si.getStorage().namespaceID &&
|
||||||
|
clusterID.equals(si.getClusterID()) &&
|
||||||
|
blockpoolID.equals(si.getBlockPoolID());
|
||||||
|
}
|
||||||
|
|
||||||
void validateStorageInfo(FSImage si) throws IOException {
|
void validateStorageInfo(FSImage si) throws IOException {
|
||||||
if(layoutVersion != si.getStorage().layoutVersion
|
if (!isSameCluster(si)
|
||||||
|| namespaceID != si.getStorage().namespaceID
|
|| !storageVersionMatches(si.getStorage())) {
|
||||||
|| cTime != si.getStorage().cTime
|
|
||||||
|| !clusterID.equals(si.getClusterID())
|
|
||||||
|| !blockpoolID.equals(si.getBlockPoolID())) {
|
|
||||||
throw new IOException("Inconsistent checkpoint fields.\n"
|
throw new IOException("Inconsistent checkpoint fields.\n"
|
||||||
+ "LV = " + layoutVersion + " namespaceID = " + namespaceID
|
+ "LV = " + layoutVersion + " namespaceID = " + namespaceID
|
||||||
+ " cTime = " + cTime
|
+ " cTime = " + cTime
|
||||||
|
|
|
@ -437,18 +437,16 @@ public class SecondaryNameNode implements Runnable {
|
||||||
// Returns a token that would be used to upload the merged image.
|
// Returns a token that would be used to upload the merged image.
|
||||||
CheckpointSignature sig = namenode.rollEditLog();
|
CheckpointSignature sig = namenode.rollEditLog();
|
||||||
|
|
||||||
// Make sure we're talking to the same NN!
|
if ((checkpointImage.getNamespaceID() == 0) ||
|
||||||
if (checkpointImage.getNamespaceID() != 0) {
|
(sig.isSameCluster(checkpointImage) &&
|
||||||
// If the image actually has some data, make sure we're talking
|
!sig.storageVersionMatches(checkpointImage.getStorage()))) {
|
||||||
// to the same NN as we did before.
|
// if we're a fresh 2NN, or if we're on the same cluster and our storage
|
||||||
sig.validateStorageInfo(checkpointImage);
|
// needs an upgrade, just take the storage info from the server.
|
||||||
} else {
|
|
||||||
// if we're a fresh 2NN, just take the storage info from the server
|
|
||||||
// we first talk to.
|
|
||||||
dstStorage.setStorageInfo(sig);
|
dstStorage.setStorageInfo(sig);
|
||||||
dstStorage.setClusterID(sig.getClusterID());
|
dstStorage.setClusterID(sig.getClusterID());
|
||||||
dstStorage.setBlockPoolID(sig.getBlockpoolID());
|
dstStorage.setBlockPoolID(sig.getBlockpoolID());
|
||||||
}
|
}
|
||||||
|
sig.validateStorageInfo(checkpointImage);
|
||||||
|
|
||||||
// error simulation code for junit test
|
// error simulation code for junit test
|
||||||
CheckpointFaultInjector.getInstance().afterSecondaryCallsRollEditLog();
|
CheckpointFaultInjector.getInstance().afterSecondaryCallsRollEditLog();
|
||||||
|
@ -703,7 +701,7 @@ public class SecondaryNameNode implements Runnable {
|
||||||
/**
|
/**
|
||||||
* Analyze checkpoint directories.
|
* Analyze checkpoint directories.
|
||||||
* Create directories if they do not exist.
|
* Create directories if they do not exist.
|
||||||
* Recover from an unsuccessful checkpoint is necessary.
|
* Recover from an unsuccessful checkpoint if necessary.
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.After;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression test for HDFS-3597, SecondaryNameNode upgrade -- when a 2NN
|
||||||
|
* starts up with an existing directory structure with an old VERSION file, it
|
||||||
|
* should delete the snapshot and download a new one from the NN.
|
||||||
|
*/
|
||||||
|
public class TestSecondaryNameNodeUpgrade {
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void cleanupCluster() throws IOException {
|
||||||
|
File hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile();
|
||||||
|
System.out.println("cleanupCluster deleting " + hdfsDir);
|
||||||
|
if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir)) {
|
||||||
|
throw new IOException("Could not delete hdfs directory '" + hdfsDir + "'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doIt(String param, String val) throws IOException {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
FileSystem fs = null;
|
||||||
|
SecondaryNameNode snn = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
|
||||||
|
snn = new SecondaryNameNode(conf);
|
||||||
|
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
|
||||||
|
fs.mkdirs(new Path("/test/foo"));
|
||||||
|
|
||||||
|
snn.doCheckpoint();
|
||||||
|
|
||||||
|
List<File> versionFiles = snn.getFSImage().getStorage().getFiles(null, "VERSION");
|
||||||
|
|
||||||
|
snn.shutdown();
|
||||||
|
|
||||||
|
for (File versionFile : versionFiles) {
|
||||||
|
System.out.println("Changing '" + param + "' to '" + val + "' in " + versionFile);
|
||||||
|
FSImageTestUtil.corruptVersionFile(versionFile, param, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
snn = new SecondaryNameNode(conf);
|
||||||
|
|
||||||
|
fs.mkdirs(new Path("/test/bar"));
|
||||||
|
|
||||||
|
snn.doCheckpoint();
|
||||||
|
} finally {
|
||||||
|
if (fs != null) fs.close();
|
||||||
|
if (cluster != null) cluster.shutdown();
|
||||||
|
if (snn != null) snn.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpgradeLayoutVersionSucceeds() throws IOException {
|
||||||
|
doIt("layoutVersion", "-39");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChangeNsIDFails() throws IOException {
|
||||||
|
try {
|
||||||
|
doIt("namespaceID", "2");
|
||||||
|
Assert.fail("Should throw InconsistentFSStateException");
|
||||||
|
} catch(IOException e) {
|
||||||
|
GenericTestUtils.assertExceptionContains("Inconsistent checkpoint fields", e);
|
||||||
|
System.out.println("Correctly failed with inconsistent namespaceID: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue