HBASE-7643 HFileArchiver.resolveAndArchive() race condition may lead to snapshot data loss (Matteo Bertozzi)
git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-7290@1445861 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d0906be22c
commit
69523a5ef8
|
@ -55,6 +55,9 @@ public class HFileArchiver {
|
||||||
private static final Log LOG = LogFactory.getLog(HFileArchiver.class);
|
private static final Log LOG = LogFactory.getLog(HFileArchiver.class);
|
||||||
private static final String SEPARATOR = ".";
|
private static final String SEPARATOR = ".";
|
||||||
|
|
||||||
|
/** Number of retries in case of fs operation failure */
|
||||||
|
private static final int DEFAULT_RETRIES_NUMBER = 6;
|
||||||
|
|
||||||
private HFileArchiver() {
|
private HFileArchiver() {
|
||||||
// hidden ctor since this is just a util
|
// hidden ctor since this is just a util
|
||||||
}
|
}
|
||||||
|
@ -133,6 +136,7 @@ public class HFileArchiver {
|
||||||
try {
|
try {
|
||||||
success = resolveAndArchive(fs, regionArchiveDir, toArchive);
|
success = resolveAndArchive(fs, regionArchiveDir, toArchive);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
LOG.error("Failed to archive: " + toArchive, e);
|
||||||
success = false;
|
success = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +147,7 @@ public class HFileArchiver {
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new IOException("Received error when attempting to archive files (" + toArchive
|
throw new IOException("Received error when attempting to archive files (" + toArchive
|
||||||
+ "), cannot delete region directory.");
|
+ "), cannot delete region directory. ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -273,14 +277,12 @@ public class HFileArchiver {
|
||||||
long start = EnvironmentEdgeManager.currentTimeMillis();
|
long start = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
List<File> failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start);
|
List<File> failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start);
|
||||||
|
|
||||||
// clean out the failures by just deleting them
|
// notify that some files were not archived.
|
||||||
|
// We can't delete the files otherwise snapshots or other backup system
|
||||||
|
// that relies on the archiver end up with data loss.
|
||||||
if (failures.size() > 0) {
|
if (failures.size() > 0) {
|
||||||
try {
|
LOG.warn("Failed to complete archive of: " + failures +
|
||||||
LOG.error("Failed to complete archive, deleting extra store files.");
|
". Those files are still in the original location, and they may slow down reads.");
|
||||||
deleteFilesWithoutArchiving(failures);
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.warn("Failed to delete store file(s) when archiving failed", e);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -383,19 +385,48 @@ public class HFileArchiver {
|
||||||
if (!fs.delete(archiveFile, false)) {
|
if (!fs.delete(archiveFile, false)) {
|
||||||
throw new IOException("Couldn't delete existing archive file (" + archiveFile
|
throw new IOException("Couldn't delete existing archive file (" + archiveFile
|
||||||
+ ") or rename it to the backup file (" + backedupArchiveFile
|
+ ") or rename it to the backup file (" + backedupArchiveFile
|
||||||
+ ")to make room for similarly named file.");
|
+ ") to make room for similarly named file.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG.debug("Backed up archive file from: " + archiveFile);
|
LOG.debug("Backed up archive file from: " + archiveFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.debug("No existing file in archive for:" + archiveFile + ", free to archive original file.");
|
LOG.debug("No existing file in archive for:" + archiveFile +
|
||||||
|
", free to archive original file.");
|
||||||
|
|
||||||
// at this point, we should have a free spot for the archive file
|
// at this point, we should have a free spot for the archive file
|
||||||
if (currentFile.moveAndClose(archiveFile)) {
|
boolean success = false;
|
||||||
|
for (int i = 0; !success && i < DEFAULT_RETRIES_NUMBER; ++i) {
|
||||||
|
if (i > 0) {
|
||||||
|
// Ensure that the archive directory exists.
|
||||||
|
// The previous "move to archive" operation has failed probably because
|
||||||
|
// the cleaner has removed our archive directory (HBASE-7643).
|
||||||
|
// (we're in a retry loop, so don't worry too much about the exception)
|
||||||
|
try {
|
||||||
|
if (!fs.exists(archiveDir)) {
|
||||||
|
if (fs.mkdirs(archiveDir)) {
|
||||||
|
LOG.debug("Created archive directory:" + archiveDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Failed to create the archive directory: " + archiveDir, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
success = currentFile.moveAndClose(archiveFile);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Failed to archive file: " + currentFile + " on try #" + i, e);
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
LOG.error("Failed to archive file:" + currentFile);
|
LOG.error("Failed to archive file:" + currentFile);
|
||||||
return false;
|
return false;
|
||||||
} else if (LOG.isDebugEnabled()) {
|
}
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Finished archiving file from: " + currentFile + ", to: " + archiveFile);
|
LOG.debug("Finished archiving file from: " + currentFile + ", to: " + archiveFile);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -572,7 +603,7 @@ public class HFileArchiver {
|
||||||
public boolean moveAndClose(Path dest) throws IOException {
|
public boolean moveAndClose(Path dest) throws IOException {
|
||||||
this.close();
|
this.close();
|
||||||
Path p = this.getPath();
|
Path p = this.getPath();
|
||||||
return !fs.rename(p, dest);
|
return fs.rename(p, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -36,13 +36,18 @@ import org.apache.hadoop.fs.PathFilter;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.MediumTests;
|
import org.apache.hadoop.hbase.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.Stoppable;
|
||||||
|
import org.apache.hadoop.hbase.backup.HFileArchiver;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||||
|
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.HFileArchiveTestingUtil;
|
import org.apache.hadoop.hbase.util.HFileArchiveTestingUtil;
|
||||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
|
import org.apache.hadoop.hbase.util.StoppableImplementation;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -314,6 +319,69 @@ public class TestHFileArchiving {
|
||||||
archivedFiles.containsAll(storeFiles));
|
archivedFiles.containsAll(storeFiles));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test HFileArchiver.resolveAndArchive() race condition HBASE-7643
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCleaningRace() throws Exception {
|
||||||
|
final long TEST_TIME = 20 * 1000;
|
||||||
|
|
||||||
|
Configuration conf = UTIL.getMiniHBaseCluster().getMaster().getConfiguration();
|
||||||
|
Path rootDir = UTIL.getDataTestDir("testCleaningRace");
|
||||||
|
FileSystem fs = UTIL.getTestFileSystem();
|
||||||
|
|
||||||
|
Path archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY);
|
||||||
|
Path regionDir = new Path("table", "abcdef");
|
||||||
|
Path familyDir = new Path(regionDir, "cf");
|
||||||
|
|
||||||
|
Path sourceRegionDir = new Path(rootDir, regionDir);
|
||||||
|
fs.mkdirs(sourceRegionDir);
|
||||||
|
|
||||||
|
Stoppable stoppable = new StoppableImplementation();
|
||||||
|
|
||||||
|
// The cleaner should be looping without long pauses to reproduce the race condition.
|
||||||
|
HFileCleaner cleaner = new HFileCleaner(1, stoppable, conf, fs, archiveDir);
|
||||||
|
try {
|
||||||
|
cleaner.start();
|
||||||
|
|
||||||
|
// Keep creating/archiving new files while the cleaner is running in the other thread
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
for (long fid = 0; (System.currentTimeMillis() - startTime) < TEST_TIME; ++fid) {
|
||||||
|
Path file = new Path(familyDir, String.valueOf(fid));
|
||||||
|
Path sourceFile = new Path(rootDir, file);
|
||||||
|
Path archiveFile = new Path(archiveDir, file);
|
||||||
|
|
||||||
|
fs.createNewFile(sourceFile);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to archive the file
|
||||||
|
HFileArchiver.archiveRegion(fs, rootDir,
|
||||||
|
sourceRegionDir.getParent(), sourceRegionDir);
|
||||||
|
|
||||||
|
// The archiver succeded, the file is no longer in the original location
|
||||||
|
// but it's in the archive location.
|
||||||
|
LOG.debug("hfile=" + fid + " should be in the archive");
|
||||||
|
assertTrue(fs.exists(archiveFile));
|
||||||
|
assertFalse(fs.exists(sourceFile));
|
||||||
|
} catch (IOException e) {
|
||||||
|
// The archiver is unable to archive the file. Probably HBASE-7643 race condition.
|
||||||
|
// in this case, the file should not be archived, and we should have the file
|
||||||
|
// in the original location.
|
||||||
|
LOG.debug("hfile=" + fid + " should be in the source location");
|
||||||
|
assertFalse(fs.exists(archiveFile));
|
||||||
|
assertTrue(fs.exists(sourceFile));
|
||||||
|
|
||||||
|
// Avoid to have this file in the next run
|
||||||
|
fs.delete(sourceFile, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
stoppable.stop("test end");
|
||||||
|
cleaner.join();
|
||||||
|
fs.delete(rootDir, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void clearArchiveDirectory() throws IOException {
|
private void clearArchiveDirectory() throws IOException {
|
||||||
UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true);
|
UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue