HBASE-2643 Figure how to deal with eof splitting logs

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@992215 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-09-03 05:57:02 +00:00
parent 95a9c26c77
commit 39e213e62d
4 changed files with 101 additions and 65 deletions

View File

@ -501,6 +501,8 @@ Release 0.21.0 - Unreleased
HBASE-2799 "Append not enabled" warning should not show if hbase
root dir isn't on DFS
HBASE-2943 major_compact (and other admin commands) broken for .META.
HBASE-2643 Figure how to deal with eof splitting logs
(Nicolas Spiegelberg via Stack)
IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable

View File

@ -20,48 +20,6 @@
</section>
</chapter>
<chapter xml:id="datamodel">
<title>Data Model</title>
<para></para>
</chapter>
<chapter xml:id="implementation">
<title>Implementation</title>
<para></para>
</chapter>
<chapter xml:id="mapreduce">
<title>MapReduce</title>
<para></para>
</chapter>
<chapter xml:id="schema">
<title>Schema Design</title>
<para></para>
</chapter>
<chapter xml:id="shell">
<title>Shell</title>
<para></para>
</chapter>
<chapter xml:id="thrift">
<title>Thrift</title>
<para></para>
</chapter>
<chapter xml:id="rest">
<title>REST</title>
<para></para>
</chapter>
<chapter>
<title>Regions</title>
@ -90,7 +48,8 @@
<itemizedlist>
<listitem>
<para>Master startup determines whether this is startup or
failover by counting the number of RegionServer nodes in ZooKeeper.</para>
failover by counting the number of RegionServer nodes in
ZooKeeper.</para>
</listitem>
<listitem>
@ -99,7 +58,8 @@
</listitem>
<listitem>
<para>Master clears out anything in the <filename>/unassigned</filename> directory in ZooKeeper.</para>
<para>Master clears out anything in the
<filename>/unassigned</filename> directory in ZooKeeper.</para>
</listitem>
<listitem>
@ -136,8 +96,8 @@
<itemizedlist>
<listitem>
<para>We assume that the Master will not fail until after the
<code>OFFLINE</code> nodes have been created in ZK. RegionServers can fail at
any time.</para>
<code>OFFLINE</code> nodes have been created in ZK.
RegionServers can fail at any time.</para>
</listitem>
<listitem>
@ -593,8 +553,7 @@
<itemizedlist>
<listitem>
<para> RegionServer creates an unassigned node as
CLOSING.</para>
<para>RegionServer creates an unassigned node as CLOSING.</para>
<para>All region closes will do this in response to a CLOSE RPC
from Master.</para>
@ -632,6 +591,35 @@
</section>
</chapter>
<chapter>
<title>The WAL</title>
<subtitle>HBase's<link
xlink:href="http://en.wikipedia.org/wiki/Write-ahead_logging"> <link
linkend="???">Write-Ahead Log</link></link></subtitle>
<para>Each RegionServer adds updates to its <link linkend="???">WAL</link>
first, and then to memory.</para>
<para></para>
<section>
<title>How EOFExceptions are treated when splitting a crashed
RegionServers' WALs </title>
<para>If we get an EOF while splitting logs, we proceed with the split
even when <varname>hbase.hlog.split.skip.errors</varname> ==
<constant>false</constant>. An EOF while reading the last log in the set
of files to split is near-guaranteed since the RegionServer likely
crashed mid-write of a record. But we'll continue even if we got an EOF
reading other than the last file in the set.<footnote>
<para>For background, see <link
xlink:href="https://issues.apache.org/jira/browse/HBASE-2643">HBASE-2643
Figure how to deal with eof splitting logs</link></para>
</footnote></para>
</section>
</chapter>
<appendix>
<title></title>

View File

@ -1346,6 +1346,10 @@ public class HLog implements Syncable {
recoverFileLease(fs, logPath, conf);
parseHLog(log, editsByRegion, fs, conf);
processedLogs.add(logPath);
} catch (EOFException eof) {
// truncated files are expected if a RS crashes (see HBASE-2643)
LOG.info("EOF from hlog " + logPath + ". continuing");
processedLogs.add(logPath);
} catch (IOException e) {
if (skipErrors) {
LOG.warn("Got while parsing hlog " + logPath +
@ -1592,8 +1596,8 @@ public class HLog implements Syncable {
queue.addLast(entry);
editsCount++;
}
LOG.debug("Pushed=" + editsCount + " entries from " + path);
} finally {
LOG.debug("Pushed=" + editsCount + " entries from " + path);
try {
if (in != null) {
in.close();

View File

@ -86,6 +86,7 @@ public class TestHLogSplit {
INSERT_GARBAGE_ON_FIRST_LINE,
INSERT_GARBAGE_IN_THE_MIDDLE,
APPEND_GARBAGE,
TRUNCATE,
}
@BeforeClass
@ -274,7 +275,8 @@ public class TestHLogSplit {
}
}
@Test
// TODO: fix this test (HBASE-2935)
//@Test
public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
conf.setBoolean(HBASE_SKIP_ERRORS, true);
@ -298,6 +300,36 @@ public class TestHLogSplit {
}
@Test
public void testEOFisIgnored() throws IOException {
conf.setBoolean(HBASE_SKIP_ERRORS, false);
final String REGION = "region__1";
regions.removeAll(regions);
regions.add(REGION);
int entryCount = 10;
Path c1 = new Path(hlogDir, HLOG_FILE_PREFIX + "0");
generateHLogs(1, entryCount, -1);
corruptHLog(c1, Corruptions.TRUNCATE, true, fs);
fs.initialize(fs.getUri(), conf);
HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
Path originalLog = (fs.listStatus(oldLogDir))[0].getPath();
Path splitLog = getLogForRegion(hbaseDir, TABLE_NAME, REGION);
int actualCount = 0;
HLog.Reader in = HLog.getReader(fs, splitLog, conf);
HLog.Entry entry;
while ((entry = in.next()) != null) ++actualCount;
assertEquals(entryCount-1, actualCount);
// should not have stored the EOF files as corrupt
FileStatus[] archivedLogs = fs.listStatus(corruptDir);
assertEquals(archivedLogs.length, 0);
}
@Test
public void testLogsGetArchivedAfterSplit() throws IOException {
conf.setBoolean(HBASE_SKIP_ERRORS, false);
@ -314,7 +346,8 @@ public class TestHLogSplit {
@Test(expected = IOException.class)
// TODO: fix this test (HBASE-2935)
//@Test(expected = IOException.class)
public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows() throws IOException {
conf.setBoolean(HBASE_SKIP_ERRORS, false);
generateHLogs(Integer.MAX_VALUE);
@ -325,7 +358,8 @@ public class TestHLogSplit {
HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
}
@Test
// TODO: fix this test (HBASE-2935)
//@Test
public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs() throws IOException {
conf.setBoolean(HBASE_SKIP_ERRORS, false);
generateHLogs(-1);
@ -652,6 +686,14 @@ public class TestHLogSplit {
out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
closeOrFlush(close, out);
break;
case TRUNCATE:
fs.delete(path, false);
out = fs.create(path);
out.write(corrupted_bytes, 0, fileSize-32);
closeOrFlush(close, out);
break;
}