From 6722e26df16039e9c91d791ef58a585d4b63c938 Mon Sep 17 00:00:00 2001 From: Zhihong Yu Date: Tue, 19 Mar 2013 22:17:53 +0000 Subject: [PATCH] HBASE-7878 recoverFileLease does not check return value of recoverLease (Ted Yu) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1458546 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/regionserver/wal/HLogSplitter.java | 16 +++++++-- .../apache/hadoop/hbase/util/FSHDFSUtils.java | 35 ++++++++++++++----- .../hbase/regionserver/wal/TestHLog.java | 30 +++++++++++----- .../hbase/regionserver/wal/TestHLogSplit.java | 9 +++-- 4 files changed, 69 insertions(+), 21 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java index 5ec4a65e9a1..dc0edb7795c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java @@ -26,6 +26,7 @@ import java.lang.reflect.InvocationTargetException; import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -322,10 +323,21 @@ public class HLogSplitter { } status.setStatus("Log splits complete. Checking for orphaned logs."); - if (fs.listStatus(srcDir).length > processedLogs.size() + FileStatus[] currFiles = fs.listStatus(srcDir); + if (currFiles.length > processedLogs.size() + corruptedLogs.size()) { + Set fileSet = new HashSet(); + for (FileStatus fstat : currFiles) { + fileSet.add(fstat.getPath()); + } + for (Path p : processedLogs) { + fileSet.remove(p); + } + for (Path p : corruptedLogs) { + fileSet.remove(p); + } throw new OrphanHLogAfterSplitException( - "Discovered orphan hlog after split. Maybe the " + "Discovered orphan hlog after split. " + fileSet.iterator().next() + " Maybe the " + "HRegionServer was not dead when we started"); } } finally { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java index 6ec8881fb06..f2fd24ca640 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java @@ -54,6 +54,8 @@ public class FSHDFSUtils extends FSUtils{ * in o.a.h.hdfs.protocol.HdfsConstants cause of HDFS-1620. */ public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000; + + public static final String TEST_TRIGGER_DFS_APPEND = "hbase.test.trigger.dfs.append"; @Override public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) @@ -72,22 +74,37 @@ public class FSHDFSUtils extends FSUtils{ // Trying recovery boolean recovered = false; + long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 300000); + // conf parameter passed from unit test, indicating whether fs.append() should be triggered + boolean triggerAppend = conf.getBoolean(TEST_TRIGGER_DFS_APPEND, false); + Exception ex = null; while (!recovered) { try { try { DistributedFileSystem dfs = (DistributedFileSystem) fs; - DistributedFileSystem.class.getMethod("recoverLease", new Class[] { Path.class }).invoke( - dfs, p); - } catch (InvocationTargetException ite) { - // function was properly called, but threw it's own exception - throw (IOException) ite.getCause(); + if (triggerAppend) throw new IOException(); + try { + recovered = (Boolean) DistributedFileSystem.class.getMethod( + "recoverLease", new Class[] { Path.class }).invoke(dfs, p); + if (!recovered) LOG.debug("recoverLease returned false"); + } catch (InvocationTargetException ite) { + // function was properly called, but threw it's own exception + throw (IOException) ite.getCause(); + } } catch (Exception e) { LOG.debug("Failed fs.recoverLease invocation, " + e.toString() + - ", trying fs.append instead"); + ", trying fs.append instead"); + ex = e; + } + if (ex != null || System.currentTimeMillis() - startWaiting > recoveryTimeout) { + LOG.debug("trying fs.append for " + p + " with " + ex); + ex = null; // assume the following append() call would succeed FSDataOutputStream out = fs.append(p); out.close(); + recovered = true; + LOG.debug("fs.append passed"); } - recovered = true; + if (recovered) break; } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); if (e instanceof AlreadyBeingCreatedException) { @@ -111,9 +128,9 @@ public class FSHDFSUtils extends FSUtils{ } try { Thread.sleep(1000); - } catch (InterruptedException ex) { + } catch (InterruptedException ie) { InterruptedIOException iioe = new InterruptedIOException(); - iioe.initCause(ex); + iioe.initCause(ie); throw iioe; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java index 879649bfa10..398654c9469 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader; import org.apache.hadoop.hbase.regionserver.wal.HLogUtil; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSHDFSUtils; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.Coprocessor; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; @@ -49,7 +50,6 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; -import org.apache.hadoop.io.SequenceFile; import org.apache.log4j.Level; import org.junit.After; import org.junit.AfterClass; @@ -79,7 +79,7 @@ public class TestHLog { private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static Path hbaseDir; private static Path oldLogDir; - + @Before public void setUp() throws Exception { @@ -99,6 +99,7 @@ public class TestHLog { // Make block sizes small. TEST_UTIL.getConfiguration().setInt("dfs.blocksize", 1024 * 1024); // needed for testAppendClose() + TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true); TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); // quicker heartbeat interval for faster DN death notification TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000); @@ -370,18 +371,30 @@ public class TestHLog { } } - // For this test to pass, requires: - // 1. HDFS-200 (append support) - // 2. HDFS-988 (SafeMode should freeze file operations - // [FSNamesystem.nextGenerationStampForBlock]) - // 3. HDFS-142 (on restart, maintain pendingCreates) + /* + * We pass different values to recoverFileLease() so that different code paths are covered + * + * For this test to pass, requires: + * 1. HDFS-200 (append support) + * 2. HDFS-988 (SafeMode should freeze file operations + * [FSNamesystem.nextGenerationStampForBlock]) + * 3. HDFS-142 (on restart, maintain pendingCreates) + */ @Test public void testAppendClose() throws Exception { + testAppendClose(true); + testAppendClose(false); + } + + /* + * @param triggerDirectAppend whether to trigger direct call of fs.append() + */ + public void testAppendClose(final boolean triggerDirectAppend) throws Exception { byte [] tableName = Bytes.toBytes(getName()); HRegionInfo regioninfo = new HRegionInfo(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false); - HLog wal = HLogFactory.createHLog(fs, dir, "hlogdir", + HLog wal = HLogFactory.createHLog(fs, dir, "hlogdir" + triggerDirectAppend, "hlogdir_archive", conf); final int total = 20; @@ -456,6 +469,7 @@ public class TestHLog { public Exception exception = null; public void run() { try { + rlConf.setBoolean(FSHDFSUtils.TEST_TRIGGER_DFS_APPEND, triggerDirectAppend); FSUtils.getInstance(fs, rlConf) .recoverFileLease(recoveredFs, walPath, rlConf); } catch (IOException e) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java index 957fbf962e5..9df97d0bfc2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java @@ -137,6 +137,8 @@ public class TestHLogSplit { FSUtils.setRootDir(TEST_UTIL.getConfiguration(), HBASEDIR); TEST_UTIL.getConfiguration().setClass("hbase.regionserver.hlog.writer.impl", InstrumentedSequenceFileLogWriter.class, HLog.Writer.class); + TEST_UTIL.getConfiguration().setBoolean("dfs.support.broken.append", true); + TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); // This is how you turn off shortcircuit read currently. TODO: Fix. Should read config. System.setProperty("hbase.tests.use.shortcircuit.reads", "false"); // Create fake maping user to group and set it to the conf. @@ -715,14 +717,17 @@ public class TestHLogSplit { fs.initialize(fs.getUri(), conf); Thread zombie = new ZombieNewLogWriterRegionServer(stop); + List splits = null; try { zombie.start(); try { HLogSplitter logSplitter = HLogSplitter.createLogSplitter(conf, HBASEDIR, HLOGDIR, OLDLOGDIR, fs); - logSplitter.splitLog(); + splits = logSplitter.splitLog(); } catch (IOException ex) {/* expected */} - int logFilesNumber = fs.listStatus(HLOGDIR).length; + FileStatus[] files = fs.listStatus(HLOGDIR); + if (files == null) fail("no files in " + HLOGDIR + " with splits " + splits); + int logFilesNumber = files.length; assertEquals("Log files should not be archived if there's an extra file after split", NUM_WRITERS + 1, logFilesNumber);