HDFS-4120. Add a new "-skipSharedEditsCheck" option for BootstrapStandby ( Contributed by Liang Xie and Rakesh R )

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611562 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinayakumar B 2014-07-18 08:48:52 +00:00
parent 8f0c341934
commit 9f75b97a09
3 changed files with 177 additions and 2 deletions

View File

@ -295,6 +295,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6655. Add 'header banner' to 'explorer.html' also in Namenode UI HDFS-6655. Add 'header banner' to 'explorer.html' also in Namenode UI
(vinayakumarb) (vinayakumarb)
HDFS-4120. Add a new "-skipSharedEditsCheck" option for BootstrapStandby
(Liang Xie and Rakesh R via vinayakumarb)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang) HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -0,0 +1,169 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.bkjournal;
import java.io.File;
import java.io.FileFilter;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.hadoop.hdfs.server.namenode.ha.TestStandbyCheckpoints.SlowCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
public class TestBootstrapStandbyWithBKJM {
private static BKJMUtil bkutil;
protected MiniDFSCluster cluster;
@BeforeClass
public static void setupBookkeeper() throws Exception {
bkutil = new BKJMUtil(3);
bkutil.start();
}
@AfterClass
public static void teardownBookkeeper() throws Exception {
bkutil.teardown();
}
@After
public void teardown() {
if (cluster != null) {
cluster.shutdown();
}
}
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
.createJournalURI("/bootstrapStandby").toString());
BKJMUtil.addJournalManagerDefinition(conf);
conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
SlowCodec.class.getCanonicalName());
CompressionCodecFactory.setCodecClasses(conf,
ImmutableList.<Class> of(SlowCodec.class));
MiniDFSNNTopology topology = new MiniDFSNNTopology()
.addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(
new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001)).addNN(
new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology)
.numDataNodes(1).manageNameDfsSharedDirs(false).build();
cluster.waitActive();
}
/**
* While boostrapping, in_progress transaction entries should be skipped.
* Bootstrap usage for BKJM : "-force", "-nonInteractive", "-skipSharedEditsCheck"
*/
@Test
public void testBootstrapStandbyWithActiveNN() throws Exception {
// make nn0 active
cluster.transitionToActive(0);
// do ops and generate in-progress edit log data
Configuration confNN1 = cluster.getConfiguration(1);
DistributedFileSystem dfs = (DistributedFileSystem) HATestUtil
.configureFailoverFs(cluster, confNN1);
for (int i = 1; i <= 10; i++) {
dfs.mkdirs(new Path("/test" + i));
}
dfs.close();
// shutdown nn1 and delete its edit log files
cluster.shutdownNameNode(1);
deleteEditLogIfExists(confNN1);
cluster.getNameNodeRpc(0).setSafeMode(SafeModeAction.SAFEMODE_ENTER, true);
cluster.getNameNodeRpc(0).saveNamespace();
cluster.getNameNodeRpc(0).setSafeMode(SafeModeAction.SAFEMODE_LEAVE, true);
// check without -skipSharedEditsCheck, Bootstrap should fail for BKJM
// immediately after saveNamespace
int rc = BootstrapStandby.run(new String[] { "-force", "-nonInteractive" },
confNN1);
Assert.assertEquals("Mismatches return code", 6, rc);
// check with -skipSharedEditsCheck
rc = BootstrapStandby.run(new String[] { "-force", "-nonInteractive",
"-skipSharedEditsCheck" }, confNN1);
Assert.assertEquals("Mismatches return code", 0, rc);
// Checkpoint as fast as we can, in a tight loop.
confNN1.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1);
cluster.restartNameNode(1);
cluster.transitionToStandby(1);
NameNode nn0 = cluster.getNameNode(0);
HATestUtil.waitForStandbyToCatchUp(nn0, cluster.getNameNode(1));
long expectedCheckpointTxId = NameNodeAdapter.getNamesystem(nn0)
.getFSImage().getMostRecentCheckpointTxId();
HATestUtil.waitForCheckpoint(cluster, 1,
ImmutableList.of((int) expectedCheckpointTxId));
// Should have copied over the namespace
FSImageTestUtil.assertNNHasCheckpoints(cluster, 1,
ImmutableList.of((int) expectedCheckpointTxId));
FSImageTestUtil.assertNNFilesMatch(cluster);
}
private void deleteEditLogIfExists(Configuration confNN1) {
String editDirs = confNN1.get(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY);
String[] listEditDirs = StringUtils.split(editDirs, ',');
Assert.assertTrue("Wrong edit directory path!", listEditDirs.length > 0);
for (String dir : listEditDirs) {
File curDir = new File(dir, "current");
File[] listFiles = curDir.listFiles(new FileFilter() {
@Override
public boolean accept(File f) {
if (!f.getName().startsWith("edits")) {
return true;
}
return false;
}
});
if (listFiles != null && listFiles.length > 0) {
for (File file : listFiles) {
Assert.assertTrue("Failed to delete edit files!", file.delete());
}
}
}
}
}

View File

@ -81,6 +81,7 @@ public class BootstrapStandby implements Tool, Configurable {
private boolean force = false; private boolean force = false;
private boolean interactive = true; private boolean interactive = true;
private boolean skipSharedEditsCheck = false;
// Exit/return codes. // Exit/return codes.
static final int ERR_CODE_FAILED_CONNECT = 2; static final int ERR_CODE_FAILED_CONNECT = 2;
@ -117,6 +118,8 @@ private void parseArgs(String[] args) {
force = true; force = true;
} else if ("-nonInteractive".equals(arg)) { } else if ("-nonInteractive".equals(arg)) {
interactive = false; interactive = false;
} else if ("-skipSharedEditsCheck".equals(arg)) {
skipSharedEditsCheck = true;
} else { } else {
printUsage(); printUsage();
throw new HadoopIllegalArgumentException( throw new HadoopIllegalArgumentException(
@ -127,7 +130,7 @@ private void parseArgs(String[] args) {
private void printUsage() { private void printUsage() {
System.err.println("Usage: " + this.getClass().getSimpleName() + System.err.println("Usage: " + this.getClass().getSimpleName() +
"[-force] [-nonInteractive]"); " [-force] [-nonInteractive] [-skipSharedEditsCheck]");
} }
private NamenodeProtocol createNNProtocolProxy() private NamenodeProtocol createNNProtocolProxy()
@ -200,7 +203,7 @@ private int doRun() throws IOException {
// Ensure that we have enough edits already in the shared directory to // Ensure that we have enough edits already in the shared directory to
// start up from the last checkpoint on the active. // start up from the last checkpoint on the active.
if (!checkLogsAvailableForRead(image, imageTxId, curTxId)) { if (!skipSharedEditsCheck && !checkLogsAvailableForRead(image, imageTxId, curTxId)) {
return ERR_CODE_LOGS_UNAVAILABLE; return ERR_CODE_LOGS_UNAVAILABLE;
} }