HBASE-7878 recoverFileLease does not check return value of recoverLease (Ted Yu)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1458546 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2013-03-19 22:17:53 +00:00
parent 76788fd5bf
commit 6722e26df1
4 changed files with 69 additions and 21 deletions

View File

@ -26,6 +26,7 @@ import java.lang.reflect.InvocationTargetException;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -322,10 +323,21 @@ public class HLogSplitter {
} }
status.setStatus("Log splits complete. Checking for orphaned logs."); status.setStatus("Log splits complete. Checking for orphaned logs.");
if (fs.listStatus(srcDir).length > processedLogs.size() FileStatus[] currFiles = fs.listStatus(srcDir);
if (currFiles.length > processedLogs.size()
+ corruptedLogs.size()) { + corruptedLogs.size()) {
Set<Path> fileSet = new HashSet<Path>();
for (FileStatus fstat : currFiles) {
fileSet.add(fstat.getPath());
}
for (Path p : processedLogs) {
fileSet.remove(p);
}
for (Path p : corruptedLogs) {
fileSet.remove(p);
}
throw new OrphanHLogAfterSplitException( throw new OrphanHLogAfterSplitException(
"Discovered orphan hlog after split. Maybe the " "Discovered orphan hlog after split. " + fileSet.iterator().next() + " Maybe the "
+ "HRegionServer was not dead when we started"); + "HRegionServer was not dead when we started");
} }
} finally { } finally {

View File

@ -55,6 +55,8 @@ public class FSHDFSUtils extends FSUtils{
*/ */
public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000; public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
public static final String TEST_TRIGGER_DFS_APPEND = "hbase.test.trigger.dfs.append";
@Override @Override
public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf)
throws IOException{ throws IOException{
@ -72,22 +74,37 @@ public class FSHDFSUtils extends FSUtils{
// Trying recovery // Trying recovery
boolean recovered = false; boolean recovered = false;
long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 300000);
// conf parameter passed from unit test, indicating whether fs.append() should be triggered
boolean triggerAppend = conf.getBoolean(TEST_TRIGGER_DFS_APPEND, false);
Exception ex = null;
while (!recovered) { while (!recovered) {
try { try {
try { try {
DistributedFileSystem dfs = (DistributedFileSystem) fs; DistributedFileSystem dfs = (DistributedFileSystem) fs;
DistributedFileSystem.class.getMethod("recoverLease", new Class[] { Path.class }).invoke( if (triggerAppend) throw new IOException();
dfs, p); try {
} catch (InvocationTargetException ite) { recovered = (Boolean) DistributedFileSystem.class.getMethod(
// function was properly called, but threw it's own exception "recoverLease", new Class[] { Path.class }).invoke(dfs, p);
throw (IOException) ite.getCause(); if (!recovered) LOG.debug("recoverLease returned false");
} catch (InvocationTargetException ite) {
// function was properly called, but threw it's own exception
throw (IOException) ite.getCause();
}
} catch (Exception e) { } catch (Exception e) {
LOG.debug("Failed fs.recoverLease invocation, " + e.toString() + LOG.debug("Failed fs.recoverLease invocation, " + e.toString() +
", trying fs.append instead"); ", trying fs.append instead");
ex = e;
}
if (ex != null || System.currentTimeMillis() - startWaiting > recoveryTimeout) {
LOG.debug("trying fs.append for " + p + " with " + ex);
ex = null; // assume the following append() call would succeed
FSDataOutputStream out = fs.append(p); FSDataOutputStream out = fs.append(p);
out.close(); out.close();
recovered = true;
LOG.debug("fs.append passed");
} }
recovered = true; if (recovered) break;
} catch (IOException e) { } catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e); e = RemoteExceptionHandler.checkIOException(e);
if (e instanceof AlreadyBeingCreatedException) { if (e instanceof AlreadyBeingCreatedException) {
@ -111,9 +128,9 @@ public class FSHDFSUtils extends FSUtils{
} }
try { try {
Thread.sleep(1000); Thread.sleep(1000);
} catch (InterruptedException ex) { } catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException(); InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ex); iioe.initCause(ie);
throw iioe; throw iioe;
} }
} }

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader; import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
import org.apache.hadoop.hbase.regionserver.wal.HLogUtil; import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSHDFSUtils;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.Coprocessor; import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
@ -49,7 +50,6 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
import org.apache.hadoop.io.SequenceFile;
import org.apache.log4j.Level; import org.apache.log4j.Level;
import org.junit.After; import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
@ -99,6 +99,7 @@ public class TestHLog {
// Make block sizes small. // Make block sizes small.
TEST_UTIL.getConfiguration().setInt("dfs.blocksize", 1024 * 1024); TEST_UTIL.getConfiguration().setInt("dfs.blocksize", 1024 * 1024);
// needed for testAppendClose() // needed for testAppendClose()
TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true);
TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
// quicker heartbeat interval for faster DN death notification // quicker heartbeat interval for faster DN death notification
TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000); TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
@ -370,18 +371,30 @@ public class TestHLog {
} }
} }
// For this test to pass, requires: /*
// 1. HDFS-200 (append support) * We pass different values to recoverFileLease() so that different code paths are covered
// 2. HDFS-988 (SafeMode should freeze file operations *
// [FSNamesystem.nextGenerationStampForBlock]) * For this test to pass, requires:
// 3. HDFS-142 (on restart, maintain pendingCreates) * 1. HDFS-200 (append support)
* 2. HDFS-988 (SafeMode should freeze file operations
* [FSNamesystem.nextGenerationStampForBlock])
* 3. HDFS-142 (on restart, maintain pendingCreates)
*/
@Test @Test
public void testAppendClose() throws Exception { public void testAppendClose() throws Exception {
testAppendClose(true);
testAppendClose(false);
}
/*
* @param triggerDirectAppend whether to trigger direct call of fs.append()
*/
public void testAppendClose(final boolean triggerDirectAppend) throws Exception {
byte [] tableName = Bytes.toBytes(getName()); byte [] tableName = Bytes.toBytes(getName());
HRegionInfo regioninfo = new HRegionInfo(tableName, HRegionInfo regioninfo = new HRegionInfo(tableName,
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false); HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false);
HLog wal = HLogFactory.createHLog(fs, dir, "hlogdir", HLog wal = HLogFactory.createHLog(fs, dir, "hlogdir" + triggerDirectAppend,
"hlogdir_archive", conf); "hlogdir_archive", conf);
final int total = 20; final int total = 20;
@ -456,6 +469,7 @@ public class TestHLog {
public Exception exception = null; public Exception exception = null;
public void run() { public void run() {
try { try {
rlConf.setBoolean(FSHDFSUtils.TEST_TRIGGER_DFS_APPEND, triggerDirectAppend);
FSUtils.getInstance(fs, rlConf) FSUtils.getInstance(fs, rlConf)
.recoverFileLease(recoveredFs, walPath, rlConf); .recoverFileLease(recoveredFs, walPath, rlConf);
} catch (IOException e) { } catch (IOException e) {

View File

@ -137,6 +137,8 @@ public class TestHLogSplit {
FSUtils.setRootDir(TEST_UTIL.getConfiguration(), HBASEDIR); FSUtils.setRootDir(TEST_UTIL.getConfiguration(), HBASEDIR);
TEST_UTIL.getConfiguration().setClass("hbase.regionserver.hlog.writer.impl", TEST_UTIL.getConfiguration().setClass("hbase.regionserver.hlog.writer.impl",
InstrumentedSequenceFileLogWriter.class, HLog.Writer.class); InstrumentedSequenceFileLogWriter.class, HLog.Writer.class);
TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true);
TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
// This is how you turn off shortcircuit read currently. TODO: Fix. Should read config. // This is how you turn off shortcircuit read currently. TODO: Fix. Should read config.
System.setProperty("hbase.tests.use.shortcircuit.reads", "false"); System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
// Create fake maping user to group and set it to the conf. // Create fake maping user to group and set it to the conf.
@ -715,14 +717,17 @@ public class TestHLogSplit {
fs.initialize(fs.getUri(), conf); fs.initialize(fs.getUri(), conf);
Thread zombie = new ZombieNewLogWriterRegionServer(stop); Thread zombie = new ZombieNewLogWriterRegionServer(stop);
List<Path> splits = null;
try { try {
zombie.start(); zombie.start();
try { try {
HLogSplitter logSplitter = HLogSplitter.createLogSplitter(conf, HLogSplitter logSplitter = HLogSplitter.createLogSplitter(conf,
HBASEDIR, HLOGDIR, OLDLOGDIR, fs); HBASEDIR, HLOGDIR, OLDLOGDIR, fs);
logSplitter.splitLog(); splits = logSplitter.splitLog();
} catch (IOException ex) {/* expected */} } catch (IOException ex) {/* expected */}
int logFilesNumber = fs.listStatus(HLOGDIR).length; FileStatus[] files = fs.listStatus(HLOGDIR);
if (files == null) fail("no files in " + HLOGDIR + " with splits " + splits);
int logFilesNumber = files.length;
assertEquals("Log files should not be archived if there's an extra file after split", assertEquals("Log files should not be archived if there's an extra file after split",
NUM_WRITERS + 1, logFilesNumber); NUM_WRITERS + 1, logFilesNumber);