HBASE-4862 Splitting hlog and opening region concurrently may cause data loss

(Chunhui Shen)


git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1206965 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2011-11-28 02:35:35 +00:00
parent 2d2e75a7fb
commit b1159115a4
4 changed files with 144 additions and 8 deletions

View File

@ -447,6 +447,8 @@ Release 0.92.0 - Unreleased
HBASE-4739 Master dying while going to close a region can leave it in transition
forever (Gao Jinchao)
HBASE-4855 SplitLogManager hangs on cluster restart due to batch.installed doubly counted
HBASE-4862 Splitting hlog and opening region concurrently may cause data loss
(Chunhui Shen)
TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -31,8 +31,8 @@ import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.SortedMap;
@ -126,6 +126,7 @@ public class HLog implements Syncable {
private static final String RECOVERED_EDITS_DIR = "recovered.edits";
private static final Pattern EDITFILES_NAME_PATTERN =
Pattern.compile("-?[0-9]+");
static final String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
private final FileSystem fs;
private final Path dir;
@ -1703,7 +1704,8 @@ public class HLog implements Syncable {
}
/**
* Returns sorted set of edit files made by wal-log splitter.
* Returns sorted set of edit files made by wal-log splitter, excluding files
* with '.temp' suffix.
* @param fs
* @param regiondir
* @return Files in passed <code>regiondir</code> as a sorted set.
@ -1726,6 +1728,11 @@ public class HLog implements Syncable {
// it a timestamp suffix. See moveAsideBadEditsFile.
Matcher m = EDITFILES_NAME_PATTERN.matcher(p.getName());
result = fs.isFile(p) && m.matches();
// Skip the file whose name ends with RECOVERED_LOG_TMPFILE_SUFFIX,
// because it means splithlog thread is writting this file.
if (p.getName().endsWith(RECOVERED_LOG_TMPFILE_SUFFIX)) {
result = false;
}
} catch (IOException e) {
LOG.warn("Failed isFile check on " + p);
}

View File

@ -458,6 +458,24 @@ public class HLogSplitter {
WriterAndPath wap = (WriterAndPath)o;
wap.w.close();
LOG.debug("Closed " + wap.p);
Path dst = getCompletedRecoveredEditsFilePath(wap.p);
if (!dst.equals(wap.p) && fs.exists(dst)) {
LOG.warn("Found existing old edits file. It could be the "
+ "result of a previous failed split attempt. Deleting " + dst
+ ", length=" + fs.getFileStatus(dst).getLen());
if (!fs.delete(dst, false)) {
LOG.warn("Failed deleting of old " + dst);
throw new IOException("Failed deleting of old " + dst);
}
}
// Skip the unit tests which create a splitter that reads and writes the
// data without touching disk. TestHLogSplit#testThreading is an
// example.
if (fs.exists(wap.p)) {
if (!fs.rename(wap.p, dst)) {
throw new IOException("Failed renaming " + wap.p + " to " + dst);
}
}
}
String msg = ("processed " + editsCount + " edits across " + n + " regions" +
" threw away edits for " + (logWriters.size() - n) + " regions" +
@ -624,8 +642,30 @@ public class HLogSplitter {
if (isCreate && !fs.exists(dir)) {
if (!fs.mkdirs(dir)) LOG.warn("mkdir failed on " + dir);
}
return new Path(dir, formatRecoveredEditsFileName(logEntry.getKey()
.getLogSeqNum()));
// Append file name ends with RECOVERED_LOG_TMPFILE_SUFFIX to ensure
// region's replayRecoveredEdits will not delete it
String fileName = formatRecoveredEditsFileName(logEntry.getKey()
.getLogSeqNum());
fileName = getTmpRecoveredEditsFileName(fileName);
return new Path(dir, fileName);
}
static String getTmpRecoveredEditsFileName(String fileName) {
return fileName + HLog.RECOVERED_LOG_TMPFILE_SUFFIX;
}
/**
* Convert path to a file under RECOVERED_EDITS_DIR directory without
* RECOVERED_LOG_TMPFILE_SUFFIX
* @param srcPath
* @return dstPath without RECOVERED_LOG_TMPFILE_SUFFIX
*/
static Path getCompletedRecoveredEditsFilePath(Path srcPath) {
String fileName = srcPath.getName();
if (fileName.endsWith(HLog.RECOVERED_LOG_TMPFILE_SUFFIX)) {
fileName = fileName.split(HLog.RECOVERED_LOG_TMPFILE_SUFFIX)[0];
}
return new Path(srcPath.getParent(), fileName);
}
static String formatRecoveredEditsFileName(final long seqid) {
@ -1136,9 +1176,33 @@ public class HLogSplitter {
thrown.add(ioe);
continue;
}
paths.add(wap.p);
LOG.info("Closed path " + wap.p +" (wrote " + wap.editsWritten + " edits in "
+ (wap.nanosSpent / 1000/ 1000) + "ms)");
Path dst = getCompletedRecoveredEditsFilePath(wap.p);
try {
if (!dst.equals(wap.p) && fs.exists(dst)) {
LOG.warn("Found existing old edits file. It could be the "
+ "result of a previous failed split attempt. Deleting " + dst
+ ", length=" + fs.getFileStatus(dst).getLen());
if (!fs.delete(dst, false)) {
LOG.warn("Failed deleting of old " + dst);
throw new IOException("Failed deleting of old " + dst);
}
}
// Skip the unit tests which create a splitter that reads and writes
// the data without touching disk. TestHLogSplit#testThreading is an
// example.
if (fs.exists(wap.p)) {
if (!fs.rename(wap.p, dst)) {
throw new IOException("Failed renaming " + wap.p + " to " + dst);
}
}
} catch (IOException ioe) {
LOG.error("Couldn't rename " + wap.p + " to " + dst, ioe);
thrown.add(ioe);
continue;
}
paths.add(dst);
}
if (!thrown.isEmpty()) {
throw MultipleIOException.createIOException(thrown);

View File

@ -19,7 +19,11 @@
*/
package org.apache.hadoop.hbase.regionserver.wal;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.FileNotFoundException;
import java.io.IOException;
@ -27,6 +31,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
@ -39,10 +44,16 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.wal.HLog.Entry;
import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.Threads;
@ -1103,6 +1114,58 @@ public class TestHLogSplit {
assertEquals(1, fs.listStatus(corruptDir).length);
}
/**
* @throws IOException
* @see https://issues.apache.org/jira/browse/HBASE-4862
*/
@Test
public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
// Generate hlogs for our destination region
String regionName = "r0";
final Path regiondir = new Path(tabledir, regionName);
regions = new ArrayList<String>();
regions.add(regionName);
generateHLogs(-1);
HLogSplitter logSplitter = new HLogSplitter(
conf, hbaseDir, hlogDir, oldLogDir, fs) {
protected HLog.Writer createWriter(FileSystem fs, Path logfile, Configuration conf)
throws IOException {
HLog.Writer writer = HLog.createWriter(fs, logfile, conf);
// After creating writer, simulate region's
// replayRecoveredEditsIfAny() which gets SplitEditFiles of this
// region and delete them, excluding files with '.temp' suffix.
NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs,
regiondir);
if (files != null && !files.isEmpty()) {
for (Path file : files) {
if (!this.fs.delete(file, false)) {
LOG.error("Failed delete of " + file);
} else {
LOG.debug("Deleted recovered.edits file=" + file);
}
}
}
return writer;
}
};
try{
logSplitter.splitLog();
} catch (IOException e) {
LOG.info(e);
Assert.fail("Throws IOException when spliting "
+ "log, it is most likely because writing file does not "
+ "exist which is caused by concurrent replayRecoveredEditsIfAny()");
}
if (fs.exists(corruptDir)) {
if (fs.listStatus(corruptDir).length > 0) {
Assert.fail("There are some corrupt logs, "
+ "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
}
}
}
private void flushToConsole(String s) {
System.out.println(s);
System.out.flush();